First step
The ALICIA - Concytec page was used to search for undergraduate theses with the following search strategy:
(“Análisis Factorial” OR “Análisis de Componentes Principales” OR “ACP” OR “confiabilidad” OR “fiabilidad” OR “análisis psicométrico” OR “baremación”) OR ((“validez” OR “validación” OR “adaptación” OR “construcción” OR “estandarización”) AND (“escala” OR “batería de pruebas” OR “prueba psicológica” OR “instrumento” OR “cuestionario” OR “test”))
In addition, the following filters were used:
- Year from 2011 to 2020
- Undergraduate thesis
- Open Access thesis
All this is kept in a static link that will be used to inspect and store the data in an excel file.
library(rvest)
library(tidyverse)
url <- "https://alicia.concytec.gob.pe/vufind/Search/Results?filter%5B%5D=format%3A%22bachelorThesis%22&filter%5B%5D=eu_rights_str_mv%3A%22openAccess%22&lookfor=%28%E2%80%9CAn%C3%A1lisis+Factorial%E2%80%9D+OR+%E2%80%9CAn%C3%A1lisis+de+Componentes+Principales%E2%80%9D+OR+%E2%80%9CACP%E2%80%9D+OR+%E2%80%9Cconfiabilidad%E2%80%9D+OR+%E2%80%9Cfiabilidad%E2%80%9D+OR+%E2%80%9Can%C3%A1lisis+psicom%C3%A9trico%E2%80%9D+OR+%22baremaci%C3%B3n%22%29+OR+%28%28%E2%80%9Cvalidez%E2%80%9D+OR+%22validaci%C3%B3n%22+OR+%22adaptaci%C3%B3n%22+OR+%22construcci%C3%B3n%22+OR+%22estandarizaci%C3%B3n%22%29+AND+%28%22escala%22+OR+%E2%80%9Cbater%C3%ADa+de+pruebas%E2%80%9D+OR+%E2%80%9Cprueba+psicol%C3%B3gica%E2%80%9D+OR+%22instrumento%22+OR+%22cuestionario%22+OR+%22test%22%29%29&type=AllFields&daterange%5B%5D=publishDate&publishDatefrom=2011&publishDateto=2020"
dina_html <- read_html(url)
Determinate the number of pages
[1] "811"
Loop for read all thesis
Read html of each thesis
read_html_thesis <- vector("list", nrow(complete_href_dina))
for (i in seq_len(nrow(complete_href_dina))) {
if (!(complete_href_dina$tesis_url[i] %in% names(read_html_thesis))) {
cat(paste("Doing thesis number", i, "..."))
ok <- FALSE
counter <- 0
while (ok == FALSE & counter <= 10) {
counter <- counter + 1
out <- tryCatch(
expr = {
complete_href_dina$tesis_url[i] %>%
read_html()
},
error = function(e) {
Sys.sleep(2)
e
}
)
if ("error" %in% class(out)) {
cat(".")
} else {
ok <- TRUE
cat("Successful!")
}
}
cat("\n")
read_html_thesis[[i]] <- out
names(read_html_thesis)[i] <- complete_href_dina$tesis_url[i]
}
}
Remove duplicates
The table has 16211 at the moment.
thesis_final <- thesis_information %>%
mutate(
titulo_tmp = str_to_upper(Titulo)
) %>%
distinct(titulo_tmp, .keep_all = TRUE) %>%
select(-c(titulo_tmp))
Now, it has 15150.
Export to XLSX
openxlsx::write.xlsx(thesis_information,
"Table complet thesis psychometric.xlsx")
LS0tCnRpdGxlOiAiSS4gV2ViIFNjcmFwcGluZyBvZiBESU5BIC0gQ29uY3l0ZWMiCmF1dGhvcjogIkJyaWFuIE4uIFBlw7FhLUNhbGVybyIKZGF0ZTogIjIzLzUvMjAyMSIKb3V0cHV0OiAKICBodG1sX25vdGVib29rOiAKICAgIG51bWJlcl9zZWN0aW9uczogeWVzCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIGhpZ2hsaWdodDoga2F0ZQogICAgdGhlbWU6IGZsYXRseQplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCiMgRmlyc3Qgc3RlcAoKVGhlIFtBTElDSUEgLSBDb25jeXRlYyBwYWdlXShodHRwczovL2FsaWNpYS5jb25jeXRlYy5nb2IucGUpIHdhcyB1c2VkIHRvIHNlYXJjaCBmb3IgdW5kZXJncmFkdWF0ZSB0aGVzZXMgd2l0aCB0aGUgZm9sbG93aW5nIHNlYXJjaCBzdHJhdGVneToKCj4gKCJBbsOhbGlzaXMgRmFjdG9yaWFsIiBPUiAiQW7DoWxpc2lzIGRlIENvbXBvbmVudGVzIFByaW5jaXBhbGVzIiBPUiAiQUNQIiBPUiAiY29uZmlhYmlsaWRhZCIgT1IgImZpYWJpbGlkYWQiIE9SICJhbsOhbGlzaXMgcHNpY29tw6l0cmljbyIgT1IgImJhcmVtYWNpw7NuIikgT1IgKCgidmFsaWRleiIgT1IgInZhbGlkYWNpw7NuIiBPUiAiYWRhcHRhY2nDs24iIE9SICJjb25zdHJ1Y2Npw7NuIiBPUiAiZXN0YW5kYXJpemFjacOzbiIpIEFORCAoImVzY2FsYSIgT1IgImJhdGVyw61hIGRlIHBydWViYXMiIE9SICJwcnVlYmEgcHNpY29sw7NnaWNhIiBPUiAiaW5zdHJ1bWVudG8iIE9SICJjdWVzdGlvbmFyaW8iIE9SICJ0ZXN0IikpCgpJbiBhZGRpdGlvbiwgdGhlIGZvbGxvd2luZyBmaWx0ZXJzIHdlcmUgdXNlZDoKCi0gWWVhciBmcm9tIDIwMTEgdG8gMjAyMAotIFVuZGVyZ3JhZHVhdGUgdGhlc2lzCi0gT3BlbiBBY2Nlc3MgdGhlc2lzCgpBbGwgdGhpcyBpcyBrZXB0IGluIGEgc3RhdGljIGxpbmsgdGhhdCB3aWxsIGJlIHVzZWQgdG8gaW5zcGVjdCBhbmQgc3RvcmUgdGhlIGRhdGEgaW4gYW4gZXhjZWwgZmlsZS4KCmBgYHtyfQpsaWJyYXJ5KHJ2ZXN0KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCnVybCA8LSAiaHR0cHM6Ly9hbGljaWEuY29uY3l0ZWMuZ29iLnBlL3Z1ZmluZC9TZWFyY2gvUmVzdWx0cz9maWx0ZXIlNUIlNUQ9Zm9ybWF0JTNBJTIyYmFjaGVsb3JUaGVzaXMlMjImZmlsdGVyJTVCJTVEPWV1X3JpZ2h0c19zdHJfbXYlM0ElMjJvcGVuQWNjZXNzJTIyJmxvb2tmb3I9JTI4JUUyJTgwJTlDQW4lQzMlQTFsaXNpcytGYWN0b3JpYWwlRTIlODAlOUQrT1IrJUUyJTgwJTlDQW4lQzMlQTFsaXNpcytkZStDb21wb25lbnRlcytQcmluY2lwYWxlcyVFMiU4MCU5RCtPUislRTIlODAlOUNBQ1AlRTIlODAlOUQrT1IrJUUyJTgwJTlDY29uZmlhYmlsaWRhZCVFMiU4MCU5RCtPUislRTIlODAlOUNmaWFiaWxpZGFkJUUyJTgwJTlEK09SKyVFMiU4MCU5Q2FuJUMzJUExbGlzaXMrcHNpY29tJUMzJUE5dHJpY28lRTIlODAlOUQrT1IrJTIyYmFyZW1hY2klQzMlQjNuJTIyJTI5K09SKyUyOCUyOCVFMiU4MCU5Q3ZhbGlkZXolRTIlODAlOUQrT1IrJTIydmFsaWRhY2klQzMlQjNuJTIyK09SKyUyMmFkYXB0YWNpJUMzJUIzbiUyMitPUislMjJjb25zdHJ1Y2NpJUMzJUIzbiUyMitPUislMjJlc3RhbmRhcml6YWNpJUMzJUIzbiUyMiUyOStBTkQrJTI4JTIyZXNjYWxhJTIyK09SKyVFMiU4MCU5Q2JhdGVyJUMzJUFEYStkZStwcnVlYmFzJUUyJTgwJTlEK09SKyVFMiU4MCU5Q3BydWViYStwc2ljb2wlQzMlQjNnaWNhJUUyJTgwJTlEK09SKyUyMmluc3RydW1lbnRvJTIyK09SKyUyMmN1ZXN0aW9uYXJpbyUyMitPUislMjJ0ZXN0JTIyJTI5JTI5JnR5cGU9QWxsRmllbGRzJmRhdGVyYW5nZSU1QiU1RD1wdWJsaXNoRGF0ZSZwdWJsaXNoRGF0ZWZyb209MjAxMSZwdWJsaXNoRGF0ZXRvPTIwMjAiCgpkaW5hX2h0bWwgPC0gcmVhZF9odG1sKHVybCkgCmBgYAoKIyMgRGV0ZXJtaW5hdGUgdGhlIG51bWJlciBvZiBwYWdlcwoKCmBgYHtyfQpsYXN0X25fcGFnZSA8LSBkaW5hX2h0bWwgJT4lIAogIGh0bWxfZWxlbWVudHMoIi5wYWdpbmF0aW9uIGxpOmxhc3QtY2hpbGQiKSAlPiUgCiAgaHRtbF90ZXh0MigpICU+JSAKICBzdHJfZXh0cmFjdCgiWzAtOV0rIikKCmxhc3Rfbl9wYWdlCmBgYAoKIyMgTG9vcCBmb3IgZXh0cmFjdCBsaW5rcyBpbiBldmVyeSBwYWdlcyBhdmFpYmxlCgpgYGB7cn0Kb3B0aW9ucyh0aW1lb3V0PSAxZSsxMCkKYGBgCgpgYGB7cn0KZ3JvdXBfcGFnZXMgPC0gc3BsaXQoc2VxX2xlbihsYXN0X25fcGFnZSksIGNlaWxpbmcoc2VxX2xlbihsYXN0X25fcGFnZSkvNTApKQoKdG1wIDwtIGxpc3QoKQpocmVmX2RpbmEgPC0gbGlzdCgpCgpmb3IgKGkgaW4gc2VxX2Fsb25nKGdyb3VwX3BhZ2VzKSkgewogIGZvciAoaiBpbiBncm91cF9wYWdlc1tbaV1dKSB7CiAgICB0bXBbW2pdXSA8LSBwYXN0ZTAodXJsLCAiJnBhZ2U9IiwgaikKCiAgICBocmVmX2RpbmFbW2pdXSA8LSB0bXBbW2pdXSAlPiUKICAgICAgcmVhZF9odG1sKCkgJT4lCiAgICAgIGh0bWxfZWxlbWVudHMoIi5yZXN1bHQgLnJvdyAubGluayBhIikgJT4lCiAgICAgIGh0bWxfYXR0cnMoKSAlPiUKICAgICAgdW5saXN0KCkgJT4lCiAgICAgIGFzX3RpYmJsZSgpCgogICAgcHJpbnQoaikKICB9CiAgU3lzLnNsZWVwKDEyMCkKfQpgYGAKCmBgYHtyfQpjb21wbGV0ZV9ocmVmX2RpbmEgPC0gaHJlZl9kaW5hICU+JSAKICBiaW5kX3Jvd3MoKSAlPiUgCiAgbXV0YXRlKAogICAgdGVzaXNfdXJsID0gcGFzdGUwKCJodHRwczovL2FsaWNpYS5jb25jeXRlYy5nb2IucGUiLAogICAgICAgICAgICAgICAgICAgICAgIHZhbHVlKQogICkgJT4lIAogIHNlbGVjdCgtdmFsdWUpCgpjb21wbGV0ZV9ocmVmX2RpbmEKYGBgCgojIExvb3AgZm9yIHJlYWQgYWxsIHRoZXNpcyAKCiMjIFJlYWQgaHRtbCBvZiBlYWNoIHRoZXNpcwoKYGBge3J9CnJlYWRfaHRtbF90aGVzaXMgPC0gdmVjdG9yKCJsaXN0IiwgbnJvdyhjb21wbGV0ZV9ocmVmX2RpbmEpKQoKZm9yIChpIGluIHNlcV9sZW4obnJvdyhjb21wbGV0ZV9ocmVmX2RpbmEpKSkgewogIGlmICghKGNvbXBsZXRlX2hyZWZfZGluYSR0ZXNpc191cmxbaV0gJWluJSBuYW1lcyhyZWFkX2h0bWxfdGhlc2lzKSkpIHsKICAgIGNhdChwYXN0ZSgiRG9pbmcgdGhlc2lzIG51bWJlciIsIGksICIuLi4iKSkKICAgIG9rIDwtIEZBTFNFCiAgICBjb3VudGVyIDwtIDAKICAgIHdoaWxlIChvayA9PSBGQUxTRSAmIGNvdW50ZXIgPD0gMTApIHsKICAgICAgY291bnRlciA8LSBjb3VudGVyICsgMQogICAgICBvdXQgPC0gdHJ5Q2F0Y2goCiAgICAgICAgZXhwciA9IHsKICAgICAgICAgIGNvbXBsZXRlX2hyZWZfZGluYSR0ZXNpc191cmxbaV0gJT4lIAogICAgICAgICAgICByZWFkX2h0bWwoKQogICAgICAgIH0sCiAgICAgICAgZXJyb3IgPSBmdW5jdGlvbihlKSB7CiAgICAgICAgICBTeXMuc2xlZXAoMikKICAgICAgICAgIGUKICAgICAgICB9CiAgICAgICkKICAgICAgaWYgKCJlcnJvciIgJWluJSBjbGFzcyhvdXQpKSB7CiAgICAgICAgY2F0KCIuIikKICAgICAgfSBlbHNlIHsKICAgICAgICBvayA8LSBUUlVFCiAgICAgICAgY2F0KCJTdWNjZXNzZnVsISIpCiAgICAgIH0KICAgIH0KICAgIGNhdCgiXG4iKQogICAgcmVhZF9odG1sX3RoZXNpc1tbaV1dIDwtIG91dAogICAgbmFtZXMocmVhZF9odG1sX3RoZXNpcylbaV0gPC0gY29tcGxldGVfaHJlZl9kaW5hJHRlc2lzX3VybFtpXQogIH0KfQpgYGAKCgojIyBFeHRyYWN0IGluZm9ybWF0aW9uCgpUaGlzIGZ1bmN0aW9uIGhlbHAgdG8gZXh0cmFjdCBpbmZvcm1hdGlvbiBhYm91dCB0aGVzaXMgbGlrZSB0aXRsZSwgYWJzdHJhY3QsIGV0Yy4KCmBgYHtyfQpleHRyYWN0X2luZm9ybWF0aW9uIDwtIGZ1bmN0aW9uKGh0bWwpIHsKICB0aXR1bG8gPC0gaHRtbCAlPiUgCiAgICBodG1sX2VsZW1lbnRzKCIubWVkaWEtYm9keSBoMSIpICU+JSAKICAgIGh0bWxfdGV4dDIoKSAlPiUgCiAgICB0aWJibGUoVGl0dWxvID0gLikKICAKICBpbmZvX2lkZW50IDwtIGh0bWwgJT4lICAKICAgIGh0bWxfdGFibGUoKSAlPiUgCiAgICBtYWdyaXR0cjo6ZXh0cmFjdDIoMSkgJT4lIAogICAgbXV0YXRlKFgxID0gc3RyX3JlbW92ZShYMSwgIjoiKSkgJT4lIAogICAgcGl2b3Rfd2lkZXIoCiAgICAgIG5hbWVzX2Zyb20gPSBYMSwKICAgICAgdmFsdWVzX2Zyb20gPSBYMgogICAgKQogIAogIHJlc3VtZW4gPC0gaHRtbCAlPiUgIAogICAgaHRtbF90YWJsZSgpICU+JSAKICAgIG1hZ3JpdHRyOjpleHRyYWN0MigyKSAlPiUgCiAgICBtdXRhdGUoWDEgPSBzdHJfcmVtb3ZlKFgxLCAiOiIpKSAlPiUgCiAgICBwaXZvdF93aWRlcigKICAgICAgbmFtZXNfZnJvbSA9IFgxLAogICAgICB2YWx1ZXNfZnJvbSA9IFgyCiAgICApCiAgCiAgaW5mb3JtYXRpb24gPC0gYmluZF9jb2xzKAogICAgaW5mb19pZGVudCwKICAgIHRpdHVsbywKICAgIHJlc3VtZW4KICApICU+JSAKICAgIHJlbG9jYXRlKFRpdHVsbywgLmFmdGVyID0gIkF1dG9yIFByaW5jaXBhbCIpCiAgCiAgcmV0dXJuKGluZm9ybWF0aW9uKQp9CmBgYAoKYGBge3J9CnRoZXNpc19pbmZvcm1hdGlvbiA8LSB2ZWN0b3IoImxpc3QiLCBsZW5ndGgocmVhZF9odG1sX3RoZXNpcykpCgpmb3IgKGkgaW4gc2VxX2xlbihsZW5ndGgocmVhZF9odG1sX3RoZXNpcykpKSB7CiAgdGhlc2lzX2luZm9ybWF0aW9uW1tpXV0gPC0gZXh0cmFjdF9pbmZvcm1hdGlvbihyZWFkX2h0bWxfdGhlc2lzW1tpXV0pCn0KYGBgCgpKb2luIGZ1bGwgaW5mb3JtYXRpb24gYWJvdXQgdGhlIHRoZXNpcwoKYGBge3J9CnRoZXNpc19pbmZvcm1hdGlvbiA8LSBiaW5kX3Jvd3ModGhlc2lzX2luZm9ybWF0aW9uKSAlPiUgCiAgcmVsb2NhdGUoYE90cm9zIEF1dG9yZXNgLCAuYWZ0ZXIgPSAiQXV0b3IgUHJpbmNpcGFsIikKCnRoZXNpc19pbmZvcm1hdGlvbgpgYGAKCiMjIFJlbW92ZSBkdXBsaWNhdGVzCgpUaGUgdGFibGUgaGFzIGByIG5yb3codGhlc2lzX2luZm9ybWF0aW9uKWAgYXQgdGhlIG1vbWVudC4KCmBgYHtyfQp0aGVzaXNfZmluYWwgPC0gdGhlc2lzX2luZm9ybWF0aW9uICU+JSAKICBtdXRhdGUoCiAgICB0aXR1bG9fdG1wID0gc3RyX3RvX3VwcGVyKFRpdHVsbykKICApICU+JQogIGRpc3RpbmN0KHRpdHVsb190bXAsIC5rZWVwX2FsbCA9IFRSVUUpICU+JSAKICBzZWxlY3QoLWModGl0dWxvX3RtcCkpCmBgYAoKTm93LCBpdCBoYXMgYHIgbnJvdyh0aGVzaXNfZmluYWwpYC4KCiMgRXhwb3J0IHRvIFhMU1gKCmBgYHtyfQpvcGVueGxzeDo6d3JpdGUueGxzeCh0aGVzaXNfZmluYWwsCiAgICAgICAgICAgICAgICAgICAgICJUYWJsZSBjb21wbGV0IHRoZXNpcyBwc3ljaG9tZXRyaWMueGxzeCIpCmBgYAo=